FIGURE 1 CELL CLUSTERING, ANALYSIS, AND ANNOTATION
# creates a Seurat object
day7_obj <- CreateSeuratObject(day7_raw_data)
# demultiplex individuals
# add vireo information (demultiplex to identify individuals)
add_vireo <- function(day7_obj, dir) {
donor_ids <- read.table(paste0(dir, 'vireo/donor_ids.tsv'), header = TRUE, stringsAsFactors = FALSE)
day7_obj <- AddMetaData(day7_obj, donor_ids$donor_id, col.name ='vireo.individual')
day7_obj <- AddMetaData(day7_obj, donor_ids$prob_max, col.name ='vireo.prob.singlet')
ambient.file <- paste0(dir, 'vireo/prop_ambient.tsv')
if (file.exists(ambient.file)) {
prop <- read.table(paste0(dir, 'vireo/prop_ambient.tsv'), header = TRUE, stringsAsFactors = FALSE)
inds <- unique(donor_ids$best_singlet)
donor_ids$prop_donor <- 0
for (i in 1:length(inds)) {
w <- which(donor_ids$best_singlet==inds[i])
donor_ids$prop_donor[w] <- prop[w,inds[i]]
}
day7_obj <- AddMetaData(day7_obj, donor_ids$prop_donor, col.name ='vireo.prop.donor')
}
day7_obj
}
day7_obj <- add_vireo(day7_obj, dir)
individual.colname <- ('vireo.individual')
individual.colname <- individual.colname[which(individual.colname %in% colnames(day7_obj@meta.data))][1]
# print each object's information on cells and features
NA19114_obj <- subset(x = day7_obj, subset = vireo.individual == "NA19114")
NA19130_obj <- subset(x = day7_obj, subset = vireo.individual == "NA19130")
NA19152_obj <- subset(x = day7_obj, subset = vireo.individual == "NA19152")
doublet_obj <- subset(x = day7_obj, subset = vireo.individual == "doublet")
unassigned_obj <- subset(x = day7_obj, subset = vireo.individual == "unassigned")
paste("NA19114:")
## [1] "NA19114:"
print( NA19114_obj)
## An object of class Seurat
## 36601 features across 2988 samples within 1 assay
## Active assay: RNA (36601 features, 0 variable features)
cat(paste('\n','NA19130:'))
##
## NA19130:
print(NA19130_obj)
## An object of class Seurat
## 36601 features across 3264 samples within 1 assay
## Active assay: RNA (36601 features, 0 variable features)
cat(paste('\n','NA19152:'))
##
## NA19152:
print(NA19152_obj)
## An object of class Seurat
## 36601 features across 3206 samples within 1 assay
## Active assay: RNA (36601 features, 0 variable features)
cat(paste('\n','doublet:'))
##
## doublet:
print(doublet_obj)
## An object of class Seurat
## 36601 features across 710 samples within 1 assay
## Active assay: RNA (36601 features, 0 variable features)
cat(paste('\n','unassigned:'))
##
## unassigned:
print(unassigned_obj)
## An object of class Seurat
## 36601 features across 50 samples within 1 assay
## Active assay: RNA (36601 features, 0 variable features)
cat(paste('\n','total:'))
##
## total:
print(day7_obj)
## An object of class Seurat
## 36601 features across 10218 samples within 1 assay
## Active assay: RNA (36601 features, 0 variable features)
# filtering out unassigned and doublet cells, and cells with less than a certain number of features
day7_obj <- subset(x = day7_obj, subset = vireo.individual == "doublet", invert = TRUE)
day7_obj <- subset(x = day7_obj, subset = vireo.individual == "unassigned", invert = TRUE)
day7_obj <- subset(day7_obj, subset = nFeature_RNA >= 1500)
# print object total cells and features (post-filtering)
day7_obj
## An object of class Seurat
## 36601 features across 8936 samples within 1 assay
## Active assay: RNA (36601 features, 0 variable features)
# normalization, variance stabilization, and dimensionality reduction
day7_sct_obj <- SCTransform(day7_obj, variable.features.n = 5000, verbose = FALSE)
day7_sct_obj <- RunPCA(day7_sct_obj, npcs = 50, verbose = FALSE)
day7_sct_obj <- RunUMAP(day7_sct_obj, dims = 1:50, verbose = FALSE)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
# break
day7_sct_obj <- FindNeighbors(day7_sct_obj, dims = 1:50, verbose = FALSE)
# prepare color palettes
umap_colors <- c("#117733", #green
"#AA4499", #purple
"#CC6677", #rose
"#999933", #olive
"#4477AA", #blue
"#332288", #indigo
"#C45F00", #orange
"#882255", #wine
"#44AA99", #teal
"#DDCC77") #sand
bar_colors <- c("#C45F00", #orange
"#882255", #wine
"#DDCC77", #sand
"#44AA99", #teal
"#332288", #indigo
"#AA4499", #purple
"#117733", #green
"#4477AA", #blue
"#999933", #olive
"#CC6677") #rose
# set cluster resolution
day7_sct_obj <- FindClusters(day7_sct_obj, resolution = 0.3, verbose = FALSE)
# change order of clusters
my_levels <- c( '0', '3', '2', '1', '6', '7', '8', '4', '5', '9')
day7_sct_obj@active.ident <- factor(x = day7_sct_obj@active.ident, levels = my_levels)
#name clusters
new_cluster_ids <- c("Cardiomyocytes", "Second heart field", "Fibroblasts", "Epicardial cells", "First heart field", "Endocardial cells", "Hepatic endoderm", "Foregut endoderm", "Neuroectoderm", "Pluripotent stem cells")
names(new_cluster_ids) <- levels(day7_sct_obj)
day7_sct_obj <- RenameIdents(day7_sct_obj, new_cluster_ids)
day7_sct_obj$cell_type <- Idents(day7_sct_obj)
# plot umap
day7_umap <- DimPlot(day7_sct_obj, reduction = "umap", cols = umap_colors) + NoLegend() +
theme(axis.title.y = element_text(size = 8, color = "black"),
axis.ticks.y=element_blank(),
axis.text.y = element_blank(),
axis.title.x = element_text(size = 8, colour = "black"),
axis.text.x = element_blank(),
axis.ticks.x=element_blank(),
panel.background = element_blank()) +
xlab("UMAP 1") + ylab("UMAP 2")
day7_totals <- as.data.frame(table(day7_sct_obj@active.ident))
day7_totals <- day7_totals %>% mutate(Percent = scales::label_percent(accuracy = .1)(Freq / sum(Freq)))
day7_totals <- day7_totals %>%
rename("Type" = "Var1",
"Percent" = "Percent",
"Count" = "Freq")
day7_totals$Type <- factor(day7_totals$Type, levels = c(
"Hepatic endoderm",
"Foregut endoderm",
"Pluripotent stem cells",
"Neuroectoderm",
"Endocardial cells",
"Second heart field",
"Cardiomyocytes",
"First heart field",
"Epicardial cells",
"Fibroblasts"))
day7_bar <- ggplot(day7_totals, aes(x = Type, y = Count, fill = Type)) +
geom_bar(stat = "identity") +
coord_flip(clip = "off") +
geom_text(aes(label = Percent), size = 2, hjust = -0.01) +
scale_fill_manual(values = bar_colors) +
NoLegend() +
theme(axis.title.y=element_blank(),
axis.ticks.y=element_blank(),
axis.text.y = element_text(size = 9, color = "black"),
axis.text.x = element_text(size = 8, colour = "black"),
panel.background = element_blank())
# setup layout for printing multiple plots
umap_layout <- c(area(1, 1), area(1, 2))
#plot umap and cell composition
p <- day7_umap + day7_bar + plot_layout(design = umap_layout)
p
# plot marker gene expression by cluster
features <- c("POU5F1", "L1TD1",
"SOX2", "CRABP1",
"HHEX", "FOXA2",
"AFP",
"KDR", "CDH5", "PECAM1", "NFATC1",
"TBX5", "TBX20",
"WT1", "TBX18", "BMP4",
"COL3A1", "LUM", "POSTN",
"EYA1", "ISL1", "FGF10",
"TNNT2", "ACTN2", "RYR2", "CACNA1C", "TNNI3")
p <- DotPlot(day7_sct_obj, col.min = 0, features = features, cols = c("light gray", "dark red"),) +
theme_linedraw() +
theme(
legend.direction = "horizontal",
legend.position = "bottom",
legend.title=element_text(size=9),
legend.text=element_text(size=9),
axis.title.y = element_blank(),
axis.title.x = element_blank(),
axis.text.x = element_text(angle = 60, vjust = 0, hjust=0),
axis.text.y = element_text(colour = umap_colors, face = "bold")) +
scale_x_discrete(position = "top")
p
NA19114_obj <- subset(x = day7_sct_obj, subset = vireo.individual == "NA19114")
NA19130_obj <- subset(x = day7_sct_obj, subset = vireo.individual == "NA19130")
NA19152_obj <- subset(x = day7_sct_obj, subset = vireo.individual == "NA19152")
# plot cell line NA19114
NA19114_umap <- DimPlot(NA19114_obj, reduction = "umap", cols = umap_colors) + NoLegend() +
theme(axis.title.y = element_text(size = 8, color = "black"),
axis.ticks.y=element_blank(),
axis.text.y = element_blank(),
axis.title.x = element_text(size = 8, colour = "black"),
axis.text.x = element_blank(),
axis.ticks.x=element_blank(),
panel.background = element_blank()) +
xlab("UMAP 1") + ylab("UMAP 2")
NA19114_totals <- as.data.frame(table(NA19114_obj@active.ident))
NA19114_totals <- NA19114_totals %>% mutate(Percent = scales::label_percent(accuracy = .1)(Freq / sum(Freq)))
NA19114_totals <- NA19114_totals %>%
rename("Type" = "Var1",
"Percent" = "Percent",
"Count" = "Freq")
NA19114_totals$Type <- factor(NA19114_totals$Type, levels = c(
"Hepatic endoderm",
"Foregut endoderm",
"Pluripotent stem cells",
"Neuroectoderm",
"Endocardial cells",
"Second heart field",
"Cardiomyocytes",
"First heart field",
"Epicardial cells",
"Fibroblasts"))
NA19114_bar <- ggplot(NA19114_totals, aes(x = Type, y = Count, fill = Type)) +
geom_bar(stat = "identity") +
coord_flip(clip = "off") +
geom_text(aes(label = Percent), size = 2, hjust = -0.01) +
scale_fill_manual(values = bar_colors) +
NoLegend() +
theme(axis.title.y=element_blank(),
axis.ticks.y=element_blank(),
axis.text.y = element_text(size = 9, color = "black"),
axis.text.x = element_text(size = 8, colour = "black"),
panel.background = element_blank())
p <- NA19114_umap + NA19114_bar + plot_layout(design = umap_layout)
p
# plot cell line NA19130
NA19130_umap <- DimPlot(NA19130_obj, reduction = "umap", cols = umap_colors) + NoLegend() +
theme(axis.title.y = element_text(size = 8, color = "black"),
axis.ticks.y=element_blank(),
axis.text.y = element_blank(),
axis.title.x = element_text(size = 8, colour = "black"),
axis.text.x = element_blank(),
axis.ticks.x=element_blank(),
panel.background = element_blank()) +
xlab("UMAP 1") + ylab("UMAP 2")
NA19130_totals <- as.data.frame(table(NA19130_obj@active.ident))
NA19130_totals <- NA19130_totals %>% mutate(Percent = scales::label_percent(accuracy = .1)(Freq / sum(Freq)))
NA19130_totals <- NA19130_totals %>%
rename("Type" = "Var1",
"Percent" = "Percent",
"Count" = "Freq")
NA19130_totals$Type <- factor(NA19130_totals$Type, levels = c(
"Hepatic endoderm",
"Foregut endoderm",
"Pluripotent stem cells",
"Neuroectoderm",
"Endocardial cells",
"Second heart field",
"Cardiomyocytes",
"First heart field",
"Epicardial cells",
"Fibroblasts"))
NA19130_bar <- ggplot(NA19130_totals, aes(x = Type, y = Count, fill = Type)) +
geom_bar(stat = "identity") +
coord_flip(clip = "off") +
geom_text(aes(label = Percent), size = 2, hjust = -0.01) +
scale_fill_manual(values = bar_colors) +
NoLegend() +
theme(axis.title.y=element_blank(),
axis.ticks.y=element_blank(),
axis.text.y = element_text(size = 9, color = "black"),
axis.text.x = element_text(size = 8, colour = "black"),
panel.background = element_blank())
p <- NA19130_umap + NA19130_bar + plot_layout(design = umap_layout)
p
# plot cell line NA19152
NA19152_umap <- DimPlot(NA19152_obj, reduction = "umap", cols = umap_colors) + NoLegend() +
theme(axis.title.y = element_text(size = 8, color = "black"),
axis.ticks.y=element_blank(),
axis.text.y = element_blank(),
axis.title.x = element_text(size = 8, colour = "black"),
axis.text.x = element_blank(),
axis.ticks.x=element_blank(),
panel.background = element_blank()) +
xlab("UMAP 1") + ylab("UMAP 2")
NA19152_totals <- as.data.frame(table(NA19152_obj@active.ident))
NA19152_totals <- NA19152_totals %>% mutate(Percent = scales::label_percent(accuracy = .1)(Freq / sum(Freq)))
NA19152_totals <- NA19152_totals %>%
rename("Type" = "Var1",
"Percent" = "Percent",
"Count" = "Freq")
NA19152_totals$Type <- factor(NA19152_totals$Type, levels = c(
"Hepatic endoderm",
"Foregut endoderm",
"Pluripotent stem cells",
"Neuroectoderm",
"Endocardial cells",
"Second heart field",
"Cardiomyocytes",
"First heart field",
"Epicardial cells",
"Fibroblasts"))
NA19152_bar <- ggplot(NA19152_totals, aes(x = Type, y = Count, fill = Type)) +
geom_bar(stat = "identity") +
coord_flip(clip = "off") +
geom_text(aes(label = Percent), size = 2, hjust = -0.01) +
scale_fill_manual(values = bar_colors) +
NoLegend() +
theme(axis.title.y=element_blank(),
axis.ticks.y=element_blank(),
axis.text.y = element_text(size = 9, color = "black"),
axis.text.x = element_text(size = 8, colour = "black"),
panel.background = element_blank())
p <- NA19152_umap + NA19152_bar + plot_layout(design = umap_layout)
p
# subset out foregut populations for subclustering for supplemental
gut <- subset(x = day7_sct_obj, subset = seurat_clusters == 4)
# normalization, variance stabilization, and dimensionality reduction
gut <- SCTransform(gut, variable.features.n = 5000, verbose = FALSE)
gut <- RunPCA(gut, npcs = 50, verbose = FALSE)
gut <- RunUMAP(gut, dims = 1:50, verbose = FALSE)
# break
gut <- FindNeighbors(gut, dims = 1:50, verbose = FALSE)
gut <- FindClusters(gut, resolution = 0.1, verbose = FALSE)
gut_cluster_ids <- c("Anterior foregut", "Posterior foregut")
names(gut_cluster_ids) <- levels(gut)
gut <- RenameIdents(gut, gut_cluster_ids)
# plot gut umap
p <- DimPlot(gut, reduction = "umap", cols = c("#0077BB", "#EE7733")) +
theme(axis.title.y = element_text(size = 8, color = "black"),
axis.ticks.y=element_blank(),
axis.text.y = element_blank(),
axis.title.x = element_text(size = 8, colour = "black"),
axis.text.x = element_blank(),
axis.ticks.x=element_blank(),
panel.background = element_blank()) +
xlab("UMAP 1") + ylab("UMAP 2")
p
SOX2 <- VlnPlot(gut, "SOX2", cols = c("#0077BB", "#EE7733")) + NoLegend() + theme(axis.title.x = element_blank(),
axis.text.x = element_text(size = 10),
axis.title.y = element_text(size=12))
ISL1 <- VlnPlot(gut, "ISL1", cols = c("#0077BB", "#EE7733")) + NoLegend() + theme(axis.title.x = element_blank(),
axis.text.x = element_text(size = 10),
axis.title.y = element_blank())
IRX3 <- VlnPlot(gut, "IRX3", cols = c("#0077BB", "#EE7733")) + NoLegend() + theme(axis.title.x = element_blank(),
axis.text.x = element_text(size = 10),
axis.title.y = element_blank())
HNF4A <- VlnPlot(gut, "HNF4A", cols = c("#0077BB", "#EE7733")) + NoLegend() + theme(axis.title.x = element_blank(),
axis.text.x = element_text(size = 10),
axis.title.y = element_text(size=12))
PROX1 <- VlnPlot(gut, "PROX1", cols = c("#0077BB", "#EE7733")) + NoLegend() + theme(axis.title.x = element_blank(),
axis.text.x = element_text(size = 10),
axis.title.y = element_blank())
FGB <- VlnPlot(gut, "FGB", cols = c("#0077BB", "#EE7733")) + NoLegend() + theme(axis.title.x = element_blank(),
axis.text.x = element_text(size = 10),
axis.title.y = element_blank())
# setup layout for printing multiple plots
gut_layout <- c(area(1, 1), area(1, 2), area(1, 3))
# anterior foregut markers = SOX2, ISL1, IRX3
p <- SOX2 + ISL1 + IRX3 + plot_layout(design = gut_layout)
p
# posterior foregut markers = HNF4A, PROX1, FGB
p <- HNF4A + PROX1 + FGB + plot_layout(design = gut_layout)
p
FIGURE 2 PLOTTING OF GUIDED DIFFERENTIATION AND 16-DAY TIME-COURSE INTEGRATION
# NOTE: the following code chunk was run outside of the Rmarkdown file as a batch job due to high computational requirements and long processing time - the resulting Seurat object is used in the next chunk
# guided differentiation culture referred to as cardiac EB as a variable
# create Seurat object
eb_obj <- CreateSeuratObject(counts = eb_raw_data)
# add vireo information (demultiplex to identify individuals)
add_vireo <- function(eb_obj, dir) {
donor_ids <- read.table(paste0(dir, 'vireo/donor_ids.tsv'), header = TRUE, stringsAsFactors = FALSE)
eb_obj <- AddMetaData(eb_obj, donor_ids$donor_id, col.name ='vireo.individual')
eb_obj <- AddMetaData(eb_obj, donor_ids$prob_max, col.name ='vireo.prob.singlet')
ambient.file <- paste0(dir, 'vireo/prop_ambient.tsv')
if (file.exists(ambient.file)) {
prop <- read.table(paste0(dir, 'vireo/prop_ambient.tsv'), header = TRUE, stringsAsFactors = FALSE)
inds <- unique(donor_ids$best_singlet)
donor_ids$prop_donor <- 0
for (i in 1:length(inds)) {
w <- which(donor_ids$best_singlet==inds[i])
donor_ids$prop_donor[w] <- prop[w,inds[i]]
}
eb_obj <- AddMetaData(eb_obj, donor_ids$prop_donor, col.name ='vireo.prop.donor')
}
eb_obj
}
eb_obj <- add_vireo(eb_obj, dir)
individual.colname <- ('vireo.individual')
individual.colname <- individual.colname[which(individual.colname %in% colnames(eb_obj@meta.data))][1]
# remove doublet and unassigned cells
eb_obj <- subset(x = eb_obj, subset = vireo.individual == "doublet", invert = TRUE)
eb_obj <- subset(x = eb_obj, subset = vireo.individual == "unassigned", invert = TRUE)
# filter cells with less than 1500 genes
eb_obj <- subset(eb_obj, subset = nFeature_RNA >= 1500)
# iPSC timecourse
# filter cells according to Elorbany publications analysis
ips_obj <- CreateSeuratObject(counts = ips_obj@assays$RNA@counts, min.cells = 10, min.features = 300)
# filter out cells with more than 25% mtDNA
ips_obj[["percent.mt"]] <- PercentageFeatureSet(ips_obj, pattern = "^MT-")
ips_obj <- subset(ips_obj, subset = percent.mt <= 25)
# filter out cells with doublet probability greater than 0.3
ips_obj <- filter(ips_obj, PRB.DBL <= 0.3)
# remove all ambiguous cells
ips_obj <- filter(ips_obj, !grepl('AMB', BEST))
# filter cells that have features or reads above or below 4 s.d. from the median
count.max <- round(median(ips_obj$nCount_RNA) + 4 * sd(ips_obj$nCount_RNA), digits = -2)
count.min <- round(median(ips_obj$nCount_RNA) - 4 * sd(ips_obj$nCount_RNA), digits = -2)
feat.max <- round(median(ips_obj$nFeature_RNA) + 4 * sd(ips_obj$nFeature_RNA), digits = -2)
feat.min <- round(median(ips_obj$nFeature_RNA) - 4 * sd(ips_obj$nFeature_RNA), digits = -2)
ips_obj <- subset(ips_obj, subset = nFeature_RNA > feat.min & nFeature_RNA < feat.max & nCount_RNA < count.max & nCount_RNA > count.min)
# Combine objects and format
# add origin metadata
ips_obj <- AddMetaData(ips_obj, metadata="Time-course", col.name="Origin")
eb_obj <- AddMetaData(eb_obj, metadata="Guided differentiation", col.name="Origin")
# make list of two objects
int_obj <- merge(ips_obj, y = eb_obj, add.cell.ids = c("IPSC", "EB"), project = "compare")
# the cardiac eb has no "day" that it was harvested, replacing with EB
int_obj@meta.data$Day <- int_obj@meta.data$Day %>% replace_na('EB')
# Run sctransform and integrate objects
int_obj <- SplitObject(int_obj, split.by = "Origin")
int_obj <- lapply(X = int_obj, FUN = SCTransform, variable.features.n = 5000)
features <- SelectIntegrationFeatures(object.list = int_obj, nfeatures = 5000)
int_obj <- PrepSCTIntegration(object.list = int_obj, anchor.features = features)
anchors <- FindIntegrationAnchors(object.list = combined_obj, normalization.method = "SCT", anchor.features = features)
int_obj <- IntegrateData(anchorset = anchors, normalization.method = "SCT")
DefaultAssay(int_obj) <- "integrated"
int_obj <- RunPCA(int_obj, npcs = 50)
int_obj <- RunUMAP(int_obj, dims = 1:50)
int_obj <- FindNeighbors(int_obj, dims = 1:50)
#change order of day-levels so they are listed chronologically
int_obj$Day <- factor(int_obj$Day, levels = c('Day 0', 'Day 1', 'Day 3', 'Day 5', 'Day 7', 'Day 11', 'Day 15', 'EB'))
# cluster and prepare to run DEG on SCT assay with multiple models
int_obj <- FindClusters(int_obj, resolution = 0.1)
int_obj <- PrepSCTFindMarkers(int_obj)
# prepare color palette
my_seven_colors <- c("#117733", "#882255", "#CC6677", "#999933", "#44AA99", "#332288", "#AA4499")
# plot UMAP split origin
DimPlot(int_obj, group.by = 'Origin', cols = c("orange", "light blue"), order = FALSE, raster = FALSE) +
theme(axis.title.y = element_text(size = 8, color = "black"),
axis.ticks.y=element_blank(),
axis.text.y = element_blank(),
axis.title.x = element_text(size = 8, colour = "black"),
axis.text.x = element_blank(),
axis.ticks.x=element_blank(),
panel.background = element_blank()) +
xlab("UMAP 1") + ylab("UMAP 2") + NoLegend()
# plot UMAPs by day
# highlight ipsc-cms day 0 clusters
cells0_0 <- Cells(subset(x = int_obj, subset = Day == "Day 0" & seurat_clusters == 0))
cells0_1 <- Cells(subset(x = int_obj, subset = Day == "Day 0" & seurat_clusters == 1))
cells0_2 <- Cells(subset(x = int_obj, subset = Day == "Day 0" & seurat_clusters == 2))
cells0_3 <- Cells(subset(x = int_obj, subset = Day == "Day 0" & seurat_clusters == 3))
cells0_4 <- Cells(subset(x = int_obj, subset = Day == "Day 0" & seurat_clusters == 4))
cells0_5 <- Cells(subset(x = int_obj, subset = Day == "Day 0" & seurat_clusters == 5))
#cells0_6 <- Cells(subset(x = int_obj, subset = Day == "Day 0" & seurat_clusters == 6)) # no cells
day0 <- DimPlot(object = int_obj, cells.highlight = list(cells0_0, cells0_1, cells0_2, cells0_3, cells0_4, cells0_5), cols.highlight = c("#332288", "#44AA99", "#999933", "#CC6677", "#882255", "#117733"), cols = "#DDDDDD", order = TRUE, label = FALSE, raster = TRUE, raster.dpi = c(1024, 1024)) + ggtitle("Time-course (day 0)") + theme(plot.title = element_text(hjust = 0.5, size = 11), panel.border = element_rect_round(radius = unit(0.1, "snpc"), color = "black", fill=NA)) + NoLegend() + NoAxes()
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
day0
# highlight ipsc-cms day 1 clusters
cells1_0 <- Cells(subset(x = int_obj, subset = Day == "Day 1" & seurat_clusters == 0))
cells1_1 <- Cells(subset(x = int_obj, subset = Day == "Day 1" & seurat_clusters == 1))
cells1_2 <- Cells(subset(x = int_obj, subset = Day == "Day 1" & seurat_clusters == 2))
cells1_3 <- Cells(subset(x = int_obj, subset = Day == "Day 1" & seurat_clusters == 3))
cells1_4 <- Cells(subset(x = int_obj, subset = Day == "Day 1" & seurat_clusters == 4))
cells1_5 <- Cells(subset(x = int_obj, subset = Day == "Day 1" & seurat_clusters == 5))
#cells1_6 <- Cells(subset(x = int_obj, subset = Day == "Day 1" & seurat_clusters == 6)) # no cells
day1 <- DimPlot(object = int_obj, cells.highlight = list(cells1_0, cells1_1, cells1_2, cells1_3, cells1_4, cells1_5), cols.highlight = c("#332288", "#44AA99", "#999933", "#CC6677", "#882255", "#117733"), cols = "#DDDDDD", order = TRUE, label = FALSE, raster = TRUE, raster.dpi = c(1024, 1024)) + ggtitle("Time-course (day 1)") + theme(plot.title = element_text(hjust = 0.5, size = 11), panel.border = element_rect_round(radius = unit(0.1, "snpc"), color = "black", fill=NA)) + NoLegend() + NoAxes()
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
day1
# highlight ipsc-cms day 3 clusters
cells3_0 <- Cells(subset(x = int_obj, subset = Day == "Day 3" & seurat_clusters == 0))
cells3_1 <- Cells(subset(x = int_obj, subset = Day == "Day 3" & seurat_clusters == 1))
cells3_2 <- Cells(subset(x = int_obj, subset = Day == "Day 3" & seurat_clusters == 2))
cells3_3 <- Cells(subset(x = int_obj, subset = Day == "Day 3" & seurat_clusters == 3))
cells3_4 <- Cells(subset(x = int_obj, subset = Day == "Day 3" & seurat_clusters == 4))
cells3_5 <- Cells(subset(x = int_obj, subset = Day == "Day 3" & seurat_clusters == 5))
#cells3_6 <- Cells(subset(x = int_obj, subset = Day == "Day 3" & seurat_clusters == 6)) # no cells
day3 <- DimPlot(object = int_obj, cells.highlight = list(cells3_0, cells3_1, cells3_2, cells3_3, cells3_4, cells3_5), cols.highlight = c("#332288", "#44AA99", "#999933", "#CC6677", "#882255", "#117733"), cols = "#DDDDDD", order = TRUE, label = FALSE, raster = TRUE, raster.dpi = c(1024, 1024)) + ggtitle("Time-course (day 3)") + theme(plot.title = element_text(hjust = 0.5, size = 11), panel.border = element_rect_round(radius = unit(0.1, "snpc"), color = "black", fill=NA)) + NoLegend() + NoAxes()
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
day3
# highlight ipsc-cms day 5 clusters
cells5_0 <- Cells(subset(x = int_obj, subset = Day == "Day 5" & seurat_clusters == 0))
cells5_1 <- Cells(subset(x = int_obj, subset = Day == "Day 5" & seurat_clusters == 1))
cells5_2 <- Cells(subset(x = int_obj, subset = Day == "Day 5" & seurat_clusters == 2))
cells5_3 <- Cells(subset(x = int_obj, subset = Day == "Day 5" & seurat_clusters == 3))
cells5_4 <- Cells(subset(x = int_obj, subset = Day == "Day 5" & seurat_clusters == 4))
cells5_5 <- Cells(subset(x = int_obj, subset = Day == "Day 5" & seurat_clusters == 5))
cells5_6 <- Cells(subset(x = int_obj, subset = Day == "Day 5" & seurat_clusters == 6))
day5 <- DimPlot(object = int_obj, cells.highlight = list(cells5_0, cells5_1, cells5_2, cells5_3, cells5_4, cells5_5, cells5_6), cols.highlight = c("#AA4499", "#332288", "#44AA99", "#999933", "#CC6677", "#882255", "#117733"), cols = "#DDDDDD", order = TRUE, label = FALSE, raster = TRUE, raster.dpi = c(1024, 1024)) + ggtitle("Time-course (day 5)") + theme(plot.title = element_text(hjust = 0.5, size = 11), panel.border = element_rect_round(radius = unit(0.1, "snpc"), color = "black", fill=NA)) + NoLegend() + NoAxes()
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
day5
# highlight ipsc-cms day 7 clusters
cells7_0 <- Cells(subset(x = int_obj, subset = Day == "Day 7" & seurat_clusters == 0))
cells7_1 <- Cells(subset(x = int_obj, subset = Day == "Day 7" & seurat_clusters == 1))
cells7_2 <- Cells(subset(x = int_obj, subset = Day == "Day 7" & seurat_clusters == 2))
cells7_3 <- Cells(subset(x = int_obj, subset = Day == "Day 7" & seurat_clusters == 3))
cells7_4 <- Cells(subset(x = int_obj, subset = Day == "Day 7" & seurat_clusters == 4))
cells7_5 <- Cells(subset(x = int_obj, subset = Day == "Day 7" & seurat_clusters == 5))
cells7_6 <- Cells(subset(x = int_obj, subset = Day == "Day 7" & seurat_clusters == 6))
day7 <- DimPlot(object = int_obj, cells.highlight = list(cells7_0, cells7_1, cells7_2, cells7_3, cells7_4, cells7_5, cells7_6), cols.highlight = c("#AA4499", "#332288", "#44AA99", "#999933", "#CC6677", "#882255", "#117733"), cols = "#DDDDDD", order = TRUE, label = FALSE, raster = TRUE, raster.dpi = c(1024, 1024)) + ggtitle("Time-course (day 7)") + theme(plot.title = element_text(hjust = 0.5, size = 11), panel.border = element_rect_round(radius = unit(0.1, "snpc"), color = "black", fill=NA)) + NoLegend() + NoAxes()
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
day7
# highlight ipsc-cms day 11 clusters
cells11_0 <- Cells(subset(x = int_obj, subset = Day == "Day 11" & seurat_clusters == 0))
cells11_1 <- Cells(subset(x = int_obj, subset = Day == "Day 11" & seurat_clusters == 1))
cells11_2 <- Cells(subset(x = int_obj, subset = Day == "Day 11" & seurat_clusters == 2))
cells11_3 <- Cells(subset(x = int_obj, subset = Day == "Day 11" & seurat_clusters == 3))
cells11_4 <- Cells(subset(x = int_obj, subset = Day == "Day 11" & seurat_clusters == 4))
cells11_5 <- Cells(subset(x = int_obj, subset = Day == "Day 11" & seurat_clusters == 5))
cells11_6 <- Cells(subset(x = int_obj, subset = Day == "Day 11" & seurat_clusters == 6))
day11 <- DimPlot(object = int_obj, cells.highlight = list(cells11_0, cells11_1, cells11_2, cells11_3, cells11_4, cells11_5, cells11_6), cols.highlight = c("#AA4499", "#332288", "#44AA99", "#999933", "#CC6677", "#882255", "#117733"), cols = "#DDDDDD", order = TRUE, label = FALSE, raster = TRUE, raster.dpi = c(1024, 1024)) + ggtitle("Time-course (day 11)") + theme(plot.title = element_text(hjust = 0.5, size = 11), panel.border = element_rect_round(radius = unit(0.1, "snpc"), color = "black", fill=NA)) + NoLegend() + NoAxes()
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
day11
# highlight ipsc-cms day 15 clusters
cells15_0 <- Cells(subset(x = int_obj, subset = Day == "Day 15" & seurat_clusters == 0))
cells15_1 <- Cells(subset(x = int_obj, subset = Day == "Day 15" & seurat_clusters == 1))
cells15_2 <- Cells(subset(x = int_obj, subset = Day == "Day 15" & seurat_clusters == 2))
cells15_3 <- Cells(subset(x = int_obj, subset = Day == "Day 15" & seurat_clusters == 3))
cells15_4 <- Cells(subset(x = int_obj, subset = Day == "Day 15" & seurat_clusters == 4))
cells15_5 <- Cells(subset(x = int_obj, subset = Day == "Day 15" & seurat_clusters == 5))
cells15_6 <- Cells(subset(x = int_obj, subset = Day == "Day 15" & seurat_clusters == 6))
day15 <- DimPlot(object = int_obj, cells.highlight = list(cells15_1, cells15_2, cells15_3, cells15_4, cells15_5, cells15_6), cols.highlight = c("#AA4499", "#332288", "#44AA99", "#999933", "#CC6677", "#882255", "#117733"), cols = "#DDDDDD", order = TRUE, label = FALSE, raster = TRUE, raster.dpi = c(1024, 1024)) + ggtitle("Time-course (day 15)") + theme(plot.title = element_text(hjust = 0.5, size = 11), panel.border = element_rect_round(radius = unit(0.1, "snpc"), color = "black", fill=NA)) + NoLegend() + NoAxes()
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
day15
# guided differentiation culture referred to as cardiac EB as a variable
# highlight cardiac EB clusters
cellsEB_0 <- Cells(subset(x = int_obj, subset = Day == "EB" & seurat_clusters == 0))
cellsEB_1 <- Cells(subset(x = int_obj, subset = Day == "EB" & seurat_clusters == 1))
cellsEB_2 <- Cells(subset(x = int_obj, subset = Day == "EB" & seurat_clusters == 2))
cellsEB_3 <- Cells(subset(x = int_obj, subset = Day == "EB" & seurat_clusters == 3))
cellsEB_4 <- Cells(subset(x = int_obj, subset = Day == "EB" & seurat_clusters == 4))
cellsEB_5 <- Cells(subset(x = int_obj, subset = Day == "EB" & seurat_clusters == 5))
cellsEB_6 <- Cells(subset(x = int_obj, subset = Day == "EB" & seurat_clusters == 6))
EB <- DimPlot(object = int_obj, cells.highlight = list(cellsEB_0, cellsEB_1, cellsEB_2, cellsEB_3, cellsEB_4, cellsEB_5, cellsEB_6), cols.highlight = c("#AA4499", "#332288", "#44AA99", "#999933", "#CC6677", "#882255", "#117733"), cols = "#DDDDDD", order = TRUE, label = FALSE, raster = TRUE, raster.dpi = c(1024, 1024)) + ggtitle("Guided differentiation (single collection)") + theme(plot.title = element_text(hjust = 0.5, size = 11), panel.border = element_rect_round(radius = unit(0.1, "snpc"), color = "black", fill=NA)) + NoLegend() + NoAxes()
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
EB
# setup layout for printing multiple plots
p1 <- ggplot(mtcars) + geom_point(aes(mpg, disp))
p2 <- ggplot(mtcars) + geom_boxplot(aes(gear, disp, group = gear))
p3 <- ggplot(mtcars) + geom_bar(aes(gear)) + facet_wrap(~cyl)
umap_layout <- c(area(1, 1), area(1, 2), area(1, 3), area(2, 1), area(2, 2), area(2, 3), area(3, 1), area(3, 2, 4, 3))
plot(umap_layout)
# plot each ipsc-cm day and the cardiac EB
p <- day0 + day1 + day3 + day5 + day7 + day11 + day15 + EB + plot_layout(design = umap_layout)
p
# plot UMAP entire integrated dataset
p <- DimPlot(int_obj, reduction = "umap", cols = my_seven_colors, order = FALSE, raster = TRUE, raster.dpi = c(1024, 1024)) +
theme(axis.title.y = element_text(size = 8, color = "black"),
axis.ticks.y=element_blank(),
axis.text.y = element_blank(),
axis.title.x = element_text(size = 8, colour = "black"),
axis.text.x = element_blank(),
axis.ticks.x=element_blank(),
panel.background = element_blank()) +
xlab("UMAP 1") + ylab("UMAP 2")
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
p
# NOTE: while UMAP embedding and clustering are done with the integrated assay, the corrected values are no longer reliable as the quantitative measure of gene expression; for performing differential expression after integration, switch back to the original data
DefaultAssay(int_obj) <- "SCT"
# plot marker gene expression by cluster and split by origin
features <- c("KDR",
"APOA1",
"FOXA2",
"CRABP2",
"POU5F1",
"CCND1",
"LUM",
"COL3A1",
"TNNT2",
"TTN")
p <- DotPlot(int_obj, features = features, cols = c("#0077BB", "#EE7733"), split.by = "Origin", dot.scale = 8) + coord_flip() + theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) + theme(axis.title.y = element_blank(), axis.title.x = element_blank())
p
FIGURE 3 PLOTTING OF SCPRED / AUTOMATED CELL ANNOTATION
## Warning: Feature names cannot have underscores ('_'), replacing with dashes
## ('-')
# add origin info to obj metadata
Miao_2020_reference <- AddMetaData(Miao_2020_reference, metadata="Miao_2020_fetal_heart", col.name="Origin")
# normalization, variance stabilization, and dimensionality reduction for reference
Miao_2020_reference <- SCTransform(Miao_2020_reference, variable.features.n = 5000, verbose = FALSE)
Miao_2020_reference <- RunPCA(Miao_2020_reference, npcs = 50, verbose = FALSE)
Miao_2020_reference <- RunUMAP(Miao_2020_reference, dims = 1:50, verbose = FALSE, return.model = TRUE)
# break
Miao_2020_reference <- FindNeighbors(Miao_2020_reference, dims = 1:50, verbose = FALSE)
# train classifiers with scPred
Miao_2020_reference <- getFeatureSpace(Miao_2020_reference, "Cell_type")
## ● Extracting feature space for each cell type...
## DONE!
Miao_2020_reference <- trainModel(Miao_2020_reference)
## ● Training models for each cell type...
## Loading required package: lattice
##
## Attaching package: 'caret'
## The following object is masked from 'package:CellChat':
##
## dotPlot
## The following object is masked from 'package:purrr':
##
## lift
## DONE!
## Warning: Feature names cannot have underscores ('_'), replacing with dashes
## ('-')
# guided differentiation culture referred to as cardiac EB as a variable
cardiac_EB <- CreateSeuratObject(cardiac_EB)
# add vireo information (demultiplex to identify individuals)
add_vireo <- function(cardiac_EB, dir) {
donor_ids <- read.table(paste0(dir, 'vireo/donor_ids.tsv'), header = TRUE, stringsAsFactors = FALSE)
cardiac_EB <- AddMetaData(cardiac_EB, donor_ids$donor_id, col.name ='vireo.individual')
cardiac_EB <- AddMetaData(cardiac_EB, donor_ids$prob_max, col.name ='vireo.prob.singlet')
ambient.file <- paste0(dir, 'vireo/prop_ambient.tsv')
if (file.exists(ambient.file)) {
prop <- read.table(paste0(dir, 'vireo/prop_ambient.tsv'), header = TRUE, stringsAsFactors = FALSE)
inds <- unique(donor_ids$best_singlet)
donor_ids$prop_donor <- 0
for (i in 1:length(inds)) {
w <- which(donor_ids$best_singlet==inds[i])
donor_ids$prop_donor[w] <- prop[w,inds[i]]
}
cardiac_EB <- AddMetaData(cardiac_EB, donor_ids$prop_donor, col.name ='vireo.prop.donor')
}
cardiac_EB
}
cardiac_EB <- add_vireo(cardiac_EB, dir)
individual.colname <- ('vireo.individual')
individual.colname <- individual.colname[which(individual.colname %in% colnames(cardiac_EB@meta.data))][1]
# filter cardiac EB for assigned singlets containing ≥1,500 features per cell
cardiac_EB <- subset(x = cardiac_EB, subset = vireo.individual == "doublet", invert = TRUE)
cardiac_EB <- subset(x = cardiac_EB, subset = vireo.individual == "unassigned", invert = TRUE)
cardiac_EB <- subset(cardiac_EB, subset = nFeature_RNA >= 1500)
# normalization and add meta data
cardiac_EB <- SCTransform(cardiac_EB, variable.features.n = 5000, verbose = FALSE)
cardiac_EB <- AddMetaData(cardiac_EB, metadata="day_7_cardiacEB", col.name="Origin")
# filtering, normalization and add meta data
day_100_organoid <- subset(day_100_organoid, subset = nFeature_RNA >= 1500)
day_100_organoid <- SCTransform(day_100_organoid, variable.features.n = 5000, verbose = FALSE)
day_100_organoid <- AddMetaData(day_100_organoid, metadata="multi-lineage organoid", col.name="Origin")
pbmc <- SCTransform(pbmc, variable.features.n = 5000, verbose = FALSE)
pbmc <- AddMetaData(pbmc, metadata="PBMCs", col.name="Origin")
# classify query cells
cardiac_EB <- scPredict(cardiac_EB, Miao_2020_reference, threshold = 0.9)
## ● Matching reference with new dataset...
## ─ 5000 features present in reference loadings
## ─ 3206 features shared between reference and new dataset
## ─ 64.12% of features in the reference are present in new dataset
## ● Aligning new data to reference...
## Warning: HarmonyMatrix is deprecated and will be removed in the future from the
## API in the future
## Warning: Warning: The parameters do_pca and npcs are deprecated. They will be ignored for this function call and please remove parameters do_pca and npcs and pass to harmony cell_embeddings directly.
## This warning is displayed once per session.
## Warning: Warning: The parameter max.iter.harmony is replaced with parameter max_iter. It will be ignored for this function call and please use parameter max_iter in future function calls.
## This warning is displayed once per session.
## Transposing data matrix
## Initializing state using k-means centroids initialization
## Harmony 1/10
## Harmony 2/10
## Harmony 3/10
## Harmony 4/10
## Harmony 5/10
## Harmony converged after 5 iterations
## ● Classifying cells...
## DONE!
day_100_organoid <- scPredict(day_100_organoid, Miao_2020_reference, threshold = 0.9)
## ● Matching reference with new dataset...
## ─ 5000 features present in reference loadings
## ─ 3371 features shared between reference and new dataset
## ─ 67.42% of features in the reference are present in new dataset
## ● Aligning new data to reference...
## Warning: HarmonyMatrix is deprecated and will be removed in the future from the
## API in the future
## Transposing data matrix
## Initializing state using k-means centroids initialization
## Harmony 1/10
## Harmony 2/10
## Harmony 3/10
## Harmony converged after 3 iterations
## ● Classifying cells...
## DONE!
pbmc <- scPredict(pbmc, Miao_2020_reference, threshold = 0.9)
## ● Matching reference with new dataset...
## ─ 5000 features present in reference loadings
## ─ 2603 features shared between reference and new dataset
## ─ 52.06% of features in the reference are present in new dataset
## ● Aligning new data to reference...
## Warning: HarmonyMatrix is deprecated and will be removed in the future from the
## API in the future
## Transposing data matrix
## Initializing state using k-means centroids initialization
## Harmony 1/10
## Harmony 2/10
## Harmony converged after 2 iterations
## ● Classifying cells...
## DONE!
# run UMAP using only scPred classified cells (unassigned cells not plotted)
sub_cardiac_EB <- subset(x = cardiac_EB, subset = scpred_prediction == "unassigned", invert = TRUE)
sub_cardiac_EB <- RunUMAP(sub_cardiac_EB, reduction = "scpred", dims = 1:50)
## 13:31:00 UMAP embedding parameters a = 0.9922 b = 1.112
## 13:31:00 Read 1114 rows and found 50 numeric columns
## 13:31:00 Using Annoy for neighbor search, n_neighbors = 30
## 13:31:00 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 13:31:00 Writing NN index file to temp file /tmp/Rtmp8VSCFo/file2ed2b604bc265
## 13:31:00 Searching Annoy index using 1 thread, search_k = 3000
## 13:31:00 Annoy recall = 100%
## 13:31:02 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 13:31:03 Found 2 connected components, falling back to 'spca' initialization with init_sdev = 1
## 13:31:03 Using 'irlba' for PCA
## 13:31:04 PCA: 2 components explained 58.91% variance
## 13:31:04 Scaling init to sdev = 1
## 13:31:04 Commencing optimization for 500 epochs, with 46946 positive edges
## 13:31:08 Optimization finished
sub_day_100_organoid <- subset(x = day_100_organoid, subset = scpred_prediction == "unassigned", invert = TRUE)
sub_day_100_organoid <- RunUMAP(sub_day_100_organoid, reduction = "scpred", dims = 1:50)
## 13:31:09 UMAP embedding parameters a = 0.9922 b = 1.112
## 13:31:09 Read 2016 rows and found 50 numeric columns
## 13:31:09 Using Annoy for neighbor search, n_neighbors = 30
## 13:31:09 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 13:31:09 Writing NN index file to temp file /tmp/Rtmp8VSCFo/file2ed2b2835a65
## 13:31:09 Searching Annoy index using 1 thread, search_k = 3000
## 13:31:10 Annoy recall = 100%
## 13:31:11 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 13:31:13 Initializing from normalized Laplacian + noise (using RSpectra)
## 13:31:13 Commencing optimization for 500 epochs, with 89398 positive edges
## 13:31:20 Optimization finished
sub_pbmc <- subset(x = pbmc, subset = scpred_prediction == "unassigned", invert = TRUE)
sub_pbmc <- RunUMAP(sub_pbmc, reduction = "scpred", dims = 1:50)
## 13:31:26 UMAP embedding parameters a = 0.9922 b = 1.112
## 13:31:26 Read 22733 rows and found 50 numeric columns
## 13:31:26 Using Annoy for neighbor search, n_neighbors = 30
## 13:31:26 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 13:31:29 Writing NN index file to temp file /tmp/Rtmp8VSCFo/file2ed2ba177b4e
## 13:31:29 Searching Annoy index using 1 thread, search_k = 3000
## 13:31:37 Annoy recall = 100%
## 13:31:38 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 13:31:40 Initializing from normalized Laplacian + noise (using RSpectra)
## 13:31:41 Commencing optimization for 200 epochs, with 999644 positive edges
## 13:32:11 Optimization finished
# colors for UMAP plots
colors <- brewer.pal(n = 11, name = 'Paired')
# plot UMAPs
ref_plot <- DimPlot(object = Miao_2020_reference, reduction = "umap", group.by = "Cell_type", label = FALSE, repel = FALSE, cols = c("#A6CEE3", "#1F78B4", "#B2DF8A", "#33A02C", "#FB9A99", "#E31A1C", "#FDBF6F", "#FF7F00", "#CAB2D6", "#6A3D9A", "#FFFF99")) +
theme(axis.title.y = element_text(size = 8, color = "black"),
axis.ticks.y=element_blank(),
axis.text.y = element_blank(),
axis.title.x = element_text(size = 8, colour = "black"),
axis.text.x = element_blank(),
axis.ticks.x=element_blank(),
panel.background = element_blank()) +
xlab("UMAP 1") + ylab("UMAP 2") +
ggtitle(NULL) +
NoLegend()
guided_plot <- DimPlot(sub_cardiac_EB, group.by = "scpred_prediction", label = FALSE, repel = FALSE, cols = c("#A6CEE3", "#1F78B4", "#B2DF8A", "#33A02C", "#FB9A99", "#E31A1C", "#FDBF6F", "#FF7F00", "#CAB2D6", "#6A3D9A")) +
theme(axis.title.y = element_text(size = 8, color = "black"),
axis.ticks.y=element_blank(),
axis.text.y = element_blank(),
axis.title.x = element_text(size = 8, colour = "black"),
axis.text.x = element_blank(),
axis.ticks.x=element_blank(),
panel.background = element_blank()) +
xlab("UMAP 1") + ylab("UMAP 2") +
ggtitle(NULL) +
NoLegend()
organoid_plot <- DimPlot(sub_day_100_organoid, group.by = "scpred_prediction", label = FALSE, repel = FALSE, cols = c("#A6CEE3", "#1F78B4", "#33A02C", "#E31A1C", "#FDBF6F", "#CAB2D6", "#6A3D9A", "#FFFF99")) +
theme(axis.title.y = element_text(size = 8, color = "black"),
axis.ticks.y=element_blank(),
axis.text.y = element_blank(),
axis.title.x = element_text(size = 8, colour = "black"),
axis.text.x = element_blank(),
axis.ticks.x=element_blank(),
panel.background = element_blank()) +
xlab("UMAP 1") + ylab("UMAP 2") +
ggtitle(NULL) +
NoLegend()
# setup layout for printing multiple plots
umaps_layout <- c(area(1, 1), area(2, 1), area(2, 2))
p <- ref_plot + guided_plot + organoid_plot + plot_layout(design = umaps_layout)
p
# plot PBMCs UMAP for supplement
p <- DimPlot(sub_pbmc, group.by = "scpred_prediction", label = FALSE, repel = FALSE, cols = c("#FDBF6F")) +
theme(axis.title.y = element_text(size = 8, color = "black"),
axis.ticks.y=element_blank(),
axis.text.y = element_blank(),
axis.title.x = element_text(size = 8, colour = "black"),
axis.text.x = element_blank(),
axis.ticks.x=element_blank(),
panel.background = element_blank()) +
xlab("UMAP 1") + ylab("UMAP 2") +
ggtitle(NULL)
p
## Saving 7 x 5 in image
print(sessionInfo())
## R version 4.2.0 (2022-04-22)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Linux 7 (Core)
##
## Matrix products: default
## BLAS/LAPACK: /software/openblas-0.3.13-el7-x86_64/lib/libopenblas_haswellp-r0.3.13.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=C
## [4] LC_COLLATE=C LC_MONETARY=C LC_MESSAGES=C
## [7] LC_PAPER=C LC_NAME=C LC_ADDRESS=C
## [10] LC_TELEPHONE=C LC_MEASUREMENT=C LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] caret_6.0-92 lattice_0.20-45 scPred_1.9.2
## [4] SeuratObject_4.1.3 Seurat_4.3.0 CellChat_2.0.0
## [7] igraph_1.3.1 ggalluvial_0.12.5 NMF_0.26
## [10] Biobase_2.56.0 BiocGenerics_0.42.0 cluster_2.1.3
## [13] rngtools_1.5.2 registry_0.5-1 RColorBrewer_1.1-3
## [16] elementalist_0.0.0.9000 patchwork_1.1.1 ggrepel_0.9.4
## [19] cowplot_1.1.1 lubridate_1.9.3 forcats_1.0.0
## [22] stringr_1.5.0 dplyr_1.1.3 purrr_1.0.2
## [25] readr_2.1.4 tidyr_1.3.0 tibble_3.2.1
## [28] ggplot2_3.4.4 tidyverse_2.0.0 Matrix_1.5-3
## [31] data.table_1.14.2
##
## loaded via a namespace (and not attached):
## [1] utf8_1.2.2 R.utils_2.11.0 spatstat.explore_3.0-6
## [4] reticulate_1.24 tidyselect_1.2.0 htmlwidgets_1.5.4
## [7] grid_4.2.0 BiocParallel_1.30.3 Rtsne_0.16
## [10] pROC_1.18.0 munsell_0.5.0 codetools_0.2-18
## [13] ica_1.0-2 future_1.25.0 miniUI_0.1.1.1
## [16] withr_2.5.0 spatstat.random_3.1-3 colorspace_2.0-3
## [19] progressr_0.10.0 highr_0.9 knitr_1.39
## [22] rstudioapi_0.15.0 stats4_4.2.0 ROCR_1.0-11
## [25] ggsignif_0.6.3 tensor_1.5 listenv_0.8.0
## [28] labeling_0.4.2 harmony_1.1.0 polyclip_1.10-0
## [31] farver_2.1.0 coda_0.19-4 parallelly_1.31.1
## [34] vctrs_0.6.4 generics_0.1.2 ipred_0.9-12
## [37] xfun_0.30 timechange_0.2.0 R6_2.5.1
## [40] doParallel_1.0.17 ggbeeswarm_0.7.2 clue_0.3-61
## [43] spatstat.utils_3.0-1 promises_1.2.0.1 scales_1.2.0
## [46] nnet_7.3-17 beeswarm_0.4.0 gtable_0.3.0
## [49] globals_0.15.0 goftest_1.2-3 timeDate_3043.102
## [52] rlang_1.1.2 systemfonts_1.0.4 GlobalOptions_0.1.2
## [55] splines_4.2.0 rstatix_0.7.2 lazyeval_0.2.2
## [58] ModelMetrics_1.2.2.2 spatstat.geom_3.0-6 broom_1.0.5
## [61] BiocManager_1.30.18 yaml_2.3.5 reshape2_1.4.4
## [64] abind_1.4-5 ggnetwork_0.5.12 backports_1.4.1
## [67] httpuv_1.6.5 lava_1.6.10 tools_4.2.0
## [70] gridBase_0.4-7 statnet.common_4.6.0 ellipsis_0.3.2
## [73] jquerylib_0.1.4 ggridges_0.5.3 Rcpp_1.0.8.3
## [76] plyr_1.8.7 rpart_4.1.16 ggpubr_0.6.0
## [79] deldir_1.0-6 pbapply_1.5-0 GetoptLong_1.0.5
## [82] S4Vectors_0.34.0 zoo_1.8-10 magrittr_2.0.3
## [85] RSpectra_0.16-1 sna_2.7-1 scattermore_0.8
## [88] circlize_0.4.15 lmtest_0.9-40 RANN_2.6.1
## [91] fitdistrplus_1.1-8 matrixStats_0.62.0 hms_1.1.3
## [94] mime_0.12 evaluate_0.15 xtable_1.8-4
## [97] RhpcBLASctl_0.23-42 IRanges_2.30.0 gridExtra_2.3
## [100] shape_1.4.6 compiler_4.2.0 KernSmooth_2.23-20
## [103] crayon_1.5.1 R.oo_1.24.0 htmltools_0.5.2
## [106] later_1.3.0 tzdb_0.4.0 DBI_1.1.3
## [109] ComplexHeatmap_2.15.4 MASS_7.3-56 car_3.1-1
## [112] cli_3.6.1 R.methodsS3_1.8.1 gower_1.0.0
## [115] parallel_4.2.0 pkgconfig_2.0.3 sp_1.6-0
## [118] recipes_0.2.0 plotly_4.10.0 spatstat.sparse_3.0-0
## [121] foreach_1.5.2 svglite_2.1.0 vipor_0.4.5
## [124] hardhat_0.2.0 bslib_0.3.1 prodlim_2019.11.13
## [127] digest_0.6.29 sctransform_0.3.5 RcppAnnoy_0.0.19
## [130] spatstat.data_3.0-0 rmarkdown_2.14 leiden_0.4.2
## [133] uwot_0.1.14 kernlab_0.9-30 shiny_1.7.1
## [136] rjson_0.2.21 nlme_3.1-157 lifecycle_1.0.4
## [139] jsonlite_1.8.7 carData_3.0-5 network_1.17.1
## [142] BiocNeighbors_1.14.0 viridisLite_0.4.0 fansi_1.0.3
## [145] pillar_1.9.0 fastmap_1.1.0 httr_1.4.7
## [148] survival_3.3-1 glue_1.6.2 FNN_1.1.3
## [151] png_0.1-7 iterators_1.0.14 class_7.3-20
## [154] presto_1.0.0 stringi_1.7.6 sass_0.4.1
## [157] irlba_2.3.5 future.apply_1.9.0